from cn.feng.spider import html_parser, html_outputer, url_manger, html_downLoader


class SpiderMain(object):
    def __init__(self):
        self.urls = url_manger.UrlManager()
        self.downloader = html_downLoader.HtmlDownLoader()
        self.parser = html_parser.HtmlParser()
        self.output = html_outputer.HtmlOutputer()

    def craw(self,root_url):
        count = 0
        self.urls.add_new_url(root_url)
        while self.urls.has_new_url():
            try:
                new_url = self.urls.get_new_url()
                count = count + 1
                print("craw %d : %s" % (count, new_url))
                html_content = self.downloader.downLoad(new_url)
                new_urls, new_data,image_nodes = self.parser.parse(new_url,html_content)
                self.downloader.img_download(image_nodes)
                self.urls.add_new_urls(new_urls)
                self.output.collect_data(new_data)
                # 输出有问题 toDO
                # self.output.output_Html()
                if count >= 3:
                    print("成功爬完%d条url" % count)
                    break
            except Exception as e:
                print("craw failed!\n" + str(e))


if __name__ == "__main__":
    root_url = "http://www.tooopen.com/img/89_874.aspx"
    obj_spider = SpiderMain()
    obj_spider.craw(root_url)

